__author__ = 'Jakub Narloch'

import os
from WEDT.Core import DocumentReader

class DocumentProcessor:
    __documentReader = None
    __progressListener = None

    def __init__(self, documentReader=DocumentReader()):
        self.__documentReader = documentReader

    def setProgressListener(self, progressListener):
        self.__progressListener = progressListener

    def processDocuments(self, path):
        listOfDocuments = list()

        for categoryDir in os.listdir(path):
            if os.path.isdir(os.path.join(path, categoryDir)):
                for document in os.listdir(os.path.join(path, categoryDir)):
                    if categoryDir != '.svn' and document != '.svn':
                        listOfDocuments.append(os.path.join(path, categoryDir, document))

        documents = list()

        if self.__progressListener is not None:
            self.__progressListener.notifyProgressStarted('Wczytywanie plikow', len(listOfDocuments))

        ind = 1
        for doc in listOfDocuments:
            if self.__progressListener is not None:
                self.__progressListener.notifyProgress(ind)
            documents.append(self.processDocument(doc))
            ind += 1

        return documents

    def processDocument(self, documentPath):
        return self.__documentReader.read(documentPath)


